{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import catboost as cat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "def reduce_mem_usage(df):\n",
    "    \"\"\" iterate through all the columns of a dataframe and modify the data type\n",
    "        to reduce memory usage.        \n",
    "    \"\"\"\n",
    "    start_mem = df.memory_usage().sum() \n",
    "    for col in df.columns:\n",
    "        col_type = df[col].dtype\n",
    "        \n",
    "        if col_type != object:\n",
    "            c_min = df[col].min()\n",
    "            c_max = df[col].max()\n",
    "            if str(col_type)[:3] == 'int':\n",
    "                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:\n",
    "                    df[col] = df[col].astype(np.int8)\n",
    "                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:\n",
    "                    df[col] = df[col].astype(np.int16)\n",
    "                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:\n",
    "                    df[col] = df[col].astype(np.int32)\n",
    "                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:\n",
    "                    df[col] = df[col].astype(np.int64)  \n",
    "            else:\n",
    "                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:\n",
    "                    df[col] = df[col].astype(np.float16)\n",
    "                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:\n",
    "                    df[col] = df[col].astype(np.float32)\n",
    "                else:\n",
    "                    df[col] = df[col].astype(np.float64)\n",
    "        else:\n",
    "            df[col] = df[col].astype('category')\n",
    "\n",
    "    end_mem = df.memory_usage().sum() \n",
    "    return df\n",
    "\n",
    "def load_data(path):\n",
    "    user = reduce_mem_usage(pd.read_csv(path + 'user.csv',header=None))\n",
    "    item = reduce_mem_usage(pd.read_csv(path + 'item.csv',header=None))\n",
    "    data = pd.read_csv(path + 'user_behavior.csv',header=None)\n",
    "\n",
    "    data.columns = ['userID','itemID','behavior','timestamp']\n",
    "    data['day'] = data['timestamp'] // 86400\n",
    "    data['hour'] = data['timestamp'] // 3600 % 24\n",
    "    \n",
    "    ## 生成behavior的onehot\n",
    "    for i in ['pv','fav','cart','buy']:\n",
    "        data[i] = 0\n",
    "        data.loc[data['behavior'] == i, i] = 1\n",
    "\n",
    "    ## 生成behavior的加权\n",
    "    \n",
    "    data['day_hour'] = data['day'] + data['hour'] / float(24)\n",
    "    data.loc[data['behavior']=='pv','behavior'] = 1\n",
    "    data.loc[data['behavior']=='fav','behavior'] = 2\n",
    "    data.loc[data['behavior']=='cart','behavior'] = 3\n",
    "    data.loc[data['behavior']=='buy','behavior'] = 1\n",
    "    max_day = max(data['day'])\n",
    "    min_day = min(data['day'])\n",
    "    data['behavior'] = (1 - (max_day-data['day_hour']+2)/(max_day-min_day+2)) * data['behavior'] \n",
    "\n",
    "    item.columns = ['itemID','category','shop','brand']\n",
    "    user.columns = ['userID','sex','age','ability']\n",
    "    \n",
    "    data = reduce_mem_usage(data)\n",
    "\n",
    "    data = pd.merge(left=data, right=item, on='itemID',how='left')\n",
    "    data = pd.merge(left=data, right=user, on='userID',how='left')\n",
    "\n",
    "    return user, item, data\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "user, item, data = load_data(path = '../ECommAI_EUIR_round1_testA_20190701/')\n",
    "user['age'] = user['age'] // 10\n",
    "data['age'] = data['age'] // 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_unique_inorder(x, k=50):\n",
    "    result = []\n",
    "    flag = set()\n",
    "    for i in x:\n",
    "        if i[0] not in flag:\n",
    "            result.append(i)\n",
    "            flag.add(i[0])\n",
    "        if len(flag) > k:\n",
    "            break\n",
    "    return result\n",
    "\n",
    "def get_recall_list(train, targetDay, k=300):\n",
    "    train_logs = dict()\n",
    "    path = './'\n",
    "    f = open(path + 'upward_map.txt','r')\n",
    "    upward_map = f.read()\n",
    "    upward_map = eval(upward_map)\n",
    "    f.close()\n",
    "    \n",
    "    f = open(path + 'downward_map.txt','r')\n",
    "    downward_map = f.read()\n",
    "    downward_map = eval(downward_map)\n",
    "    f.close()\n",
    "    \n",
    "\n",
    "    f = open(path + 'item_Apriori.txt','r')\n",
    "    tmp = f.read()\n",
    "    item_dict = eval(tmp)\n",
    "    f.close()\n",
    "    \n",
    "    if targetDay > max(train['day']):\n",
    "        for row in train[['userID','itemID','behavior']].values:\n",
    "            train_logs.setdefault(row[0], dict())\n",
    "            if row[1] in upward_map:\n",
    "                train_logs[row[0]].setdefault(upward_map[row[1]],0)\n",
    "                train_logs[row[0]][upward_map[row[1]]] = max(train_logs[row[0]][upward_map[row[1]]],row[2])\n",
    "    else:\n",
    "        user_List_test = set(train.loc[train['day']==targetDay,'userID'])\n",
    "        train = train[train['day'] < targetDay]\n",
    "        \n",
    "        for row in train[['userID','itemID','behavior']].values:\n",
    "            if row[0] in user_List_test:\n",
    "                train_logs.setdefault(row[0], dict())\n",
    "                if row[1] in upward_map:\n",
    "                    train_logs[row[0]].setdefault(upward_map[row[1]],0)\n",
    "                    train_logs[row[0]][upward_map[row[1]]] = max(train_logs[row[0]][upward_map[row[1]]],row[2])\n",
    "\n",
    "    for each_user in train_logs:\n",
    "        sum_value = sum(train_logs[each_user].values())\n",
    "        if sum_value > 0:\n",
    "            for each_item in train_logs[each_user]:\n",
    "                train_logs[each_user][each_item] /= sum_value            \n",
    "\n",
    "    result_logs = dict()    \n",
    "    for u in train_logs:\n",
    "        result_logs.setdefault(u, list())\n",
    "        for i in set(train_logs[u].keys()):\n",
    "            if i in item_dict:\n",
    "                tmp_list = [ (x[0], train_logs[u][i]*x[1]) for x in item_dict[i]]\n",
    "                result_logs[u] += tmp_list\n",
    "            \n",
    "    for u in result_logs:\n",
    "        result_logs[u] = get_unique_inorder([(downward_map[x[0]], x[1]) for x in sorted(result_logs[u], key=lambda x:x[1], reverse=True)\n",
    "                          if x[0] not in train_logs[u]], k=k)  \n",
    "    \n",
    "    return result_logs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = data[data['day'] < 15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_pairs(recall):\n",
    "    result = []\n",
    "    for u in recall:\n",
    "        rank = 0\n",
    "        lenth = len(recall[u])\n",
    "        for i in recall[u]:\n",
    "            result.append([u,i[0],i[1], rank, rank/lenth])\n",
    "            rank += 1\n",
    "    return result\n",
    "\n",
    "def reshape_recall_to_dataframe(recall):\n",
    "    result = generate_pairs(recall)\n",
    "    result = pd.DataFrame(result)\n",
    "    result.columns = ['userID','itemID','apriori', 'apriori_rank', 'apriori_top']\n",
    "    return result\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#test_recall_logs = get_recall_list(data, targetDay = 15, k=500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "#test_recall = reshape_recall_to_dataframe(test_recall_logs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_recall = pd.read_csv('recall_list_testA_15day_300lenth.csv')\n",
    "test_recall = pd.merge(left=test_recall, right=user, on='userID',how='left')\n",
    "test_recall = pd.merge(left=test_recall, right=item, on='itemID',how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "underline_features_files = [\n",
    "'brand_count.csv',\n",
    "'brand_sum.csv',\n",
    "'category_count.csv',\n",
    "'category_sum.csv',\n",
    "'itemID_count.csv',\n",
    "'itemID_sum.csv',\n",
    "'shop_count.csv',\n",
    "'shop_sum.csv',\n",
    "'category_lower.csv',\n",
    "'item_rank.csv',\n",
    "'category_higher.csv',\n",
    "'itemID_higher.csv',\n",
    "]\n",
    "\n",
    "underline_features = []\n",
    "for f in underline_features_files:\n",
    "    underline_features.append(reduce_mem_usage(pd.read_csv(f)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "for f in underline_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=f.columns[0], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "double_underline_features_files = [\n",
    "'item_to_ability_count_underline.csv',\n",
    "'item_to_sex_count_underline.csv',\n",
    "'item_to_age_count_underline.csv',\n",
    "]\n",
    "\n",
    "double_underline_features = []\n",
    "for f in double_underline_features_files:\n",
    "    double_underline_features.append(pd.read_csv(f, engine='c'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "for f in double_underline_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=list(f.columns[0: 2]), how='left', sort=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_online_features(data):\n",
    "    online_features = []\n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        online_features.append(data[['behavior','userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "            {'behavior': 'count'}).rename(columns={'behavior':'user_to_'\n",
    "                                                   + count_feature + '_count'}))\n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        online_features.append(data[['behavior','userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "            {'behavior': 'sum'}).rename(columns={'behavior':'user_to_' \n",
    "                                                 + count_feature + '_sum'}))\n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        for behavior_type in ['pv','buy']:\n",
    "            online_features.append(data[[behavior_type,'userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "                {behavior_type: 'sum'}).rename(columns={behavior_type:'user_to_'\n",
    "                                                       + count_feature + '_count_' + behavior_type}))\n",
    "\n",
    "    return online_features\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "online_features = generate_online_features(test)\n",
    "\n",
    "for f in online_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=list(f.columns[0: 2]), how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_yestday_features(data):\n",
    "    yestday_features = []\n",
    "    yestday = data[data['day'] == 14]\n",
    "    \n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        yestday_features.append(yestday[['behavior','userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "            {'behavior': 'count'}).rename(columns={'behavior':'user_to_'\n",
    "                                                   + count_feature + '_count_yestday'}))\n",
    "\n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        for behavior_type in ['pv','buy']:\n",
    "            yestday_features.append(yestday[[behavior_type,'userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "                {behavior_type: 'sum'}).rename(columns={behavior_type:'user_to_'\n",
    "                                                       + count_feature + '_count_'+behavior_type+'_yestday'}))\n",
    "    return yestday_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_5days_features(data):\n",
    "    a5days = data[(data['day'] > 15 - 5) & (data['day'] < 15)]\n",
    "    five_days_features = []\n",
    "    \n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        five_days_features.append(a5days[['behavior','userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "            {'behavior': 'count'}).rename(columns={'behavior':'user_to_'\n",
    "                                                   + count_feature + '_count_5days'}))\n",
    "\n",
    "    for count_feature in ['category','shop','brand']:\n",
    "        for behavior_type in ['pv','fav','cart','buy']:\n",
    "            five_days_features.append(a5days[[behavior_type,'userID',count_feature]].groupby(['userID', count_feature], as_index=False).agg(\n",
    "                {behavior_type: 'sum'}).rename(columns={behavior_type:'user_to_'\n",
    "                                                       + count_feature + '_count_' + behavior_type+'_5days'}))\n",
    "    return five_days_features\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "yestday_features = generate_yestday_features(test)\n",
    "\n",
    "for f in yestday_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=list(f.columns[0: 2]), how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "five_days_features = generate_5days_features(test)\n",
    "\n",
    "for f in five_days_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=list(f.columns[0: 2]), how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "time_features_files = [\n",
    "'itemID_last_time_underline.csv',\n",
    "'brand_last_time_underline.csv',\n",
    "'shop_last_time_underline.csv'\n",
    "]\n",
    "\n",
    "\n",
    "time_features = []\n",
    "for f in time_features_files:\n",
    "    time_features.append(reduce_mem_usage(pd.read_csv(f)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "for f in time_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=f.columns[0], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_dynamic_features(data):\n",
    "    dynamic_time_features = []\n",
    "    test = data[data['day'] < 15]\n",
    "    start_timestamp  = max(test['timestamp'])\n",
    "    test['lasttime'] = start_timestamp - test['timestamp']\n",
    "    \n",
    "    for dynamic_time_feature in ['shop', 'category','brand']:\n",
    "        dynamic_time_features.append(test[['lasttime','userID',dynamic_time_feature,'day']].groupby(['userID',dynamic_time_feature], as_index=False).agg({'lasttime': 'min', 'day':'max'}).rename(columns={'lasttime': 'user_to_'\n",
    "                                                       + dynamic_time_feature + '_lasttime', 'day':'user_to_'+ dynamic_time_feature + '_lastday'}))\n",
    "    return dynamic_time_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "dynamic_time_features = generate_dynamic_features(test)\n",
    "for f in dynamic_time_features:\n",
    "    test_recall = pd.merge(left=test_recall, right=f, on=list(f.columns[0: 2]), how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "cbt_model = cat.CatBoostClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "cbt_model = cbt_model.load_model('model0924_base.file')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "## About Model Averaging\n",
    "\n",
    "## WE DIDNT TEST MODEL AVERAGING OFFLINE, BUT WE APPLIED IT ONLINE\n",
    "## PLEASE SEE FILE IN /CIKM-2019-AnalytiCup/Semi-Finals/online_recommendation/\n",
    "\n",
    "#0.045965784783714\n",
    "# test_recall['ensemble'] = 10 / ( 5/test_recall['label_lgb'] + 5/test_recall['label'])\n",
    "\n",
    "# #0.045943749548558184\n",
    "# test_recall['ensemble_power'] = np.power( test_recall['label_lgb']**4.8 * test_recall['label']**5.2 , 1/10)\n",
    "\n",
    "# #0.045996441844155474\n",
    "# test_recall['ensemble_final'] = test_recall['ensemble']*0.5 + test_recall['ensemble_power'] * 0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = [x for x in test_recall.columns if x not in ['itemID','userID','category','shop','brand','label']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_recall['label'] = cbt_model.predict_proba(test_recall[features])[:,1]\n",
    "\n",
    "train_logs = dict()\n",
    "train_ = data[data['day'] < 15]\n",
    "for row in train_[['userID','itemID']].values:\n",
    "    train_logs.setdefault(row[0], [])\n",
    "    train_logs[row[0]].append(row[1])\n",
    "\n",
    "test_logs = dict()\n",
    "test_ = data[data['day'] == 15]\n",
    "for row in test_[['userID','itemID']].values:\n",
    "    test_logs.setdefault(row[0], [])\n",
    "    test_logs[row[0]].append(row[1])\n",
    "    \n",
    "\n",
    "result_logs = dict()\n",
    "test_recall = test_recall.sort_values('label', ascending=False).reset_index(drop=True)\n",
    "for row in test_recall[['userID','itemID']].values:\n",
    "    result_logs.setdefault(row[0], [])\n",
    "    if len(result_logs[row[0]]) < 50:\n",
    "        result_logs[row[0]].append(row[1])\n",
    "\n",
    "temp = data.groupby(['itemID'], as_index=False).count()[['itemID','userID']]\n",
    "hot_items = list(temp.sort_values('userID', ascending=False).reset_index(drop=True)['itemID'][:100])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = time.time()\n",
    "rec_dict = dict()\n",
    "for u in set(data['userID']):\n",
    "    if u in result_logs:\n",
    "        lenth = len(result_logs[u])\n",
    "        if lenth < 50:\n",
    "            rec_dict[u] = result_logs[u] + [x for x in hot_items if x not in result_logs[u] and x not in train_logs[u]][:50 - lenth]\n",
    "        else:\n",
    "            rec_dict[u] = result_logs[u]\n",
    "    else:\n",
    "        rec_dict[u] = [x for x in hot_items][:50]\n",
    "b=time.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.221940040588379"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b - a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recall(dict1, dict2, train_dict):\n",
    "    '''\n",
    "    dict1 是真值 dict2 是预测值.\n",
    "    '''\n",
    "    \n",
    "    result = 0\n",
    "    count = 0\n",
    "    for i in dict1:\n",
    "        if i in dict2 and i in train_dict:\n",
    "            new_item = set()\n",
    "    \n",
    "            for k in dict1[i]:\n",
    "                if k not in train_dict[i]:\n",
    "                    new_item.add(k)\n",
    "            if new_item:\n",
    "                result += len(new_item & set(dict2[i])) / len(new_item)\n",
    "                count += 1\n",
    "            \n",
    "    if count == 0:\n",
    "        return 0\n",
    "    else:\n",
    "        return result / count\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.04542858771389073"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recall(test_logs, rec_dict, train_logs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'test_recall_logs' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-31-a84d35211ea1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrecall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_logs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtest_recall_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtest_recall_logs\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'test_recall_logs' is not defined"
     ]
    }
   ],
   "source": [
    "recall(test_logs, {x:[x[0] for x in test_recall_logs[x]] for x in test_recall_logs}, train_logs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
